# Run preamble
source("code/preamble.R")

# Load data
study1_data <- readRDS("data/study1_data.rds")
study2_data <- readRDS("data/study2_data.rds")
study3_data <- readRDS("data/study3_data.rds")
study4_data <- readRDS("data/study4_data.rds")
study5_data <- readRDS("data/study5_data.rds")

#####################
### PLOT PLACEBOS ###
#####################

# Reshape placebos data
placebo_dist_data <- study4_data %>%
  subset(condition == "Red Sox vs. Yankees" | condition == "Blue vs. Pink") %>%
  select(condition, american_yankees_dist, canadian_sox_dist) %>%
  pivot_longer(
    cols = ends_with("dist"),
    names_to = "Group"
  )

# Clean up variables
placebo_dist_data <- placebo_dist_data %>%
  mutate(
    Group = case_when(
      condition == "Blue vs. Pink" &
        Group == "american_yankees_dist" ~ "Americans",
      condition == "Blue vs. Pink" &
        Group == "canadian_sox_dist" ~ "Canadians",
      condition == "Red Sox vs. Yankees" &
        Group == "american_yankees_dist" ~ "Die-Hard Yankees Fans",
      condition == "Red Sox vs. Yankees" &
        Group == "canadian_sox_dist" ~ "Die-Hard Red Sox Fans"
    )
  ) %>%
  rename(Question = condition)

# Split bins into variables
placebo_dist_data[paste0("bin_", 0:10)] <- placebo_dist_data$value %>%
  str_split_fixed(",", n = 11)

# Convert bin variables to numeric
placebo_dist_data <- placebo_dist_data %>%
  mutate(across(bin_0:bin_10, ~ as.numeric(.x)))

# Summarize distributions
placebo_dist_data <- placebo_dist_data %>%
  group_by(Question, Group) %>%
  summarise(
    across(
      starts_with("bin_"),
      ~ mean(.x, na.rm = TRUE)
    )
  )

# Plot placebos
sox_yankees_plot <- placebo_dist_data %>%
  subset(Question == "Red Sox vs. Yankees") %>%
  pivot_longer(
    cols = starts_with("bin_"),
    names_prefix = "bin_",
    names_to = "bin",
    values_to = "avg_freq"
  ) %>%
  mutate(bin = as.numeric(bin)) %>%
  ggplot(aes(x = bin, y = avg_freq, linetype = Group, fill = Group)) +
  geom_area(position = position_dodge(width = 0), alpha = .3) +
  ylim(0, 14) +
  scale_fill_manual(values = c("#BD3039", "#003087")) +
  labs(
    x = "<< Red Soxs are Better ... Yankees are Better >>",
    y = "Average Number of Tokens"
  ) +
  theme_bw() +
  theme(
    legend.justification = c(0, 1),
    legend.position = c(0, 1),
    legend.background = element_rect(fill = "transparent")
  )

blue_pink_plot <- placebo_dist_data %>%
  subset(Question == "Blue vs. Pink") %>%
  pivot_longer(
    cols = starts_with("bin_"),
    names_prefix = "bin_",
    names_to = "bin",
    values_to = "avg_freq"
  ) %>%
  mutate(bin = as.numeric(bin)) %>%
  ggplot(aes(x = bin, y = avg_freq, linetype = Group, fill = Group)) +
  geom_area(position = position_dodge(width = 0), alpha = .3) +
  ylim(0, 14) +
  scale_fill_manual(values = c("#0A3161", "#FF0000")) +
  labs(x = "<< Prefer Blue ... Prefer Pink >>", y = "") +
  theme_bw() +
  theme(
    legend.justification = c(0, 1),
    legend.position = c(0, 1),
    legend.background = element_rect(fill = "transparent")
  )

#########################
### PLOT DESCRIPTIVES ###
#########################

# Subset Study 2 data
study2_data <- study2_data %>%
  subset(condition == "dist_no_incen")

# Subset Study 3 data
study3_data <- study3_data %>%
  subset(
    grepl("^dist_", condition)
  )

# Subset and clean Study 4 data
study4_data <- study4_data %>%
  subset( # Drop Independents and participants in the placebo conditions
    !is.na(pid2) & condition != "Blue vs. Pink" &
      condition != "Red Sox vs. Yankees"
  ) %>%
  mutate( # Create policy column
    policy = condition %>%
      as.character() %>%
      factor(
        levels = c(
          "Gun Control",
          "Border Control",
          "Abortion Access"
        )
      )
  )

# Subset Study 5 data
study5_data <- study5_data %>%
  subset(
    grepl("^dist_", condition)
  )

# Determine actual distribution
actual_dists <- study1_data %>%
  group_by(pid2, policy, par_pos) %>% # Group data
  summarise(count = n()) %>% # Count observations
  ungroup() %>%
  group_by(pid2, policy) %>% # Group data
  mutate( # Determine average number of tokens per bin
    party_cond_count = sum(count),
    prop = count / party_cond_count,
    `Average Number of Tokens` = prop * 20
  ) %>%
  ungroup() %>%
  select(pid2, policy, par_pos, `Average Number of Tokens`) %>%
  rename(Party = pid2) %>%
  mutate(Party = paste0(Party, "s"))

# Bind perceptions data from different studies
perceived_dists <- rbind(
  select(study1_data, rep_dist, dem_dist, study, policy),
  select(study2_data, rep_dist, dem_dist, study, policy),
  select(study3_data, rep_dist, dem_dist, study, policy),
  select(study4_data, rep_dist, dem_dist, study, policy),
  select(study5_data, rep_dist, dem_dist, study, policy)
)

# Reshape and clean perceptions data
perceived_dists <- perceived_dists %>%
  separate_wider_delim(dem_dist, delim = ",", names_sep = ".") %>% # Split distributions into bin counts
  separate_wider_delim(rep_dist, delim = ",", names_sep = ".") %>%
  pivot_longer( # Reshape data
    cols = matches("dist"),
    names_sep = "\\.",
    names_to = c("Party", "<< Liberal Position ... Conservative Position >>"),
    values_to = "Tokens"
  ) %>%
  mutate( # Clean up variables
    Party = case_when(
      Party == "dem_dist" ~ "Democrats",
      Party == "rep_dist" ~ "Republicans"
    ),
    `<< Liberal Position ... Conservative Position >>` = as.numeric(`<< Liberal Position ... Conservative Position >>`) - 1,
    Tokens = as.numeric(Tokens)
  ) %>%
  rename(
    Study = study
  ) %>%
  group_by(Study, policy, Party, `<< Liberal Position ... Conservative Position >>`) %>% # Group data
  summarize(`Average Number of Tokens` = mean(Tokens, na.rm = TRUE)) %>%
  ungroup()

# Plot descriptives
descriptives_plot <- ggplot() +
  geom_area(
    data = perceived_dists,
    aes(
      x = `<< Liberal Position ... Conservative Position >>`, y = `Average Number of Tokens`, fill = Party,
      linetype = Study
    ),
    position = position_dodge(width = 0),
    alpha = 0.075
  ) +
  geom_line(
    data = actual_dists,
    aes(
      x = par_pos, y = `Average Number of Tokens`, color = Party
    )
  ) +
  facet_wrap(~policy) +
  theme_bw() +
  labs(x = "<< Liberal Position ... Conservative Position >>") +
  scale_color_manual(values = c("dodgerblue3", "firebrick3")) +
  scale_fill_manual(values = c("dodgerblue3", "firebrick3")) +
  guides(color = "none", fill = "none", linetype = "none")

##############
### FIGURE ###
##############

# Set up rows
row_1 <- ggarrange(descriptives_plot, labels = "A")
row_2 <- ggarrange(sox_yankees_plot, blue_pink_plot, labels = c("B", "C"))

# Arrange figure
ggarrange(row_1, row_2, nrow = 2)

# Save figure
ggsave(
  filename = "figures/descriptives_and_placebos.png",
  device = "png",
  width = 10,
  height = 6
)

# Print disclaimer
cat(
  "\n\n",
  " See descriptives_and_placebos.png for Figure 1."
)

#########################################################
### OVERLAP BETWEEN "TYPICAL" PERCEIVED DISTRIBUTIONS ###
#########################################################

# Generate unique pairs of studies
unique_study_pairs <- perceived_dists$Study %>%
  unique() %>%
  combn(., 2) %>%
  t() %>%
  as.data.frame()

# Reshape perceived distribution data
perceived_dists_wide <- perceived_dists %>%
  pivot_wider(
    names_from = Study,
    values_from = `Average Number of Tokens`
  )

# Initialize stash for pairwise overlaps between perceived distributions
pair_overlaps <- data.frame()

# Iterate over unique study pairs
for (i in 1:nrow(unique_study_pairs)) {
  # Calculate and store pairwise overlap between perceived distributions
  pair_overlaps <- perceived_dists_wide %>%
    select( # Select needed data
      policy, Party,
      unique_study_pairs$V1[i], unique_study_pairs$V2[i]
    ) %>%
    mutate( # Calculate absolute difference at each scale point
      abs_diff = abs(.[[3]] - .[[4]])
    ) %>%
    group_by(policy, Party) %>% # Group by question
    summarize(overlap = (40 - sum(abs_diff)) / 40) %>% # Sum absolute differences
    rbind(pair_overlaps, .)
}

# Characterize pairwise overlaps between perceived distributions
cat(
  "\n\n",
  " MEAN of overlap between `typical' perceived distributions (across studies):",
  mean(pair_overlaps$overlap),
  "\n",
  " SD of overlap between `typical' perceived distributions (across studies):",
  sd(pair_overlaps$overlap)
)

############################################
### TABLES FOR SUPPLEMENTARY INFORMATION ###
############################################

# Generate table describing "typical" perceived distributions
perceived_dists %>%
  pivot_wider( # Reshape data
    names_from = `<< Liberal Position ... Conservative Position >>`,
    values_from = `Average Number of Tokens`
  ) %>%
  unite(dist, `0`:`10`, sep = ",") %>% # Reconstruct distribution
  group_by(Study, policy, Party) %>% # Group data
  mutate( # Calculate distirbution mean and SD
    Mean = dist_mean(dist),
    SD = dist_sd(dist)
  ) %>%
  select(-dist) %>% # Drop distribution
  pivot_wider( # Reshape data
    names_from = Party,
    values_from = c(Mean, SD),
    names_glue = "{.value} ({Party})",
  ) %>%
  select( # Rearrange variables
    Study, policy,
    `Mean (Democrats)`, `SD (Democrats)`,
    `Mean (Republicans)`, `SD (Republicans)`
  ) %>%
  rename(`Policy Issue` = policy) %>% # Rename policy variable
  datasummary_df(
    title = "Moments of ``Typical'' Perceived Distributions by Study",
    notes = "Note: Each ``typical'' distribution is generated by averaging the number of tokens participants collectively placed at each scale point.",
    output = "tables/moments_typical_perceived_dists.txt"
  )

# Generate table describing placebo distributions
placebo_dist_data %>%
  group_by(Question, Group) %>%
  mutate(
    across(
      bin_0:bin_10,
      as.character
    )
  ) %>%
  unite(dist, bin_0:bin_10, sep = ",") %>%
  mutate(
    Mean = dist_mean(dist),
    SD = dist_sd(dist)
  ) %>%
  select(Question, Group, Mean, SD) %>%
  datasummary_df(
    title = "Moments of Non-Political Perceived Distributions",
    notes = "This table includes data from Study 4.",
    output = "tables/moments_placebo_dists.txt"
  )
